# DOMAIN: Botanical Research
• CONTEXT: University X is currently undergoing some research involving understanding the characteristics of plant and plant seedlings at
various stages of growth. They already have have invested on curating sample images. They require an automation which can create a
classifier capable of determining a plant's species from a photo.
• DATA DESCRIPTION: The dataset comprises of images from 12 plant species.
Source: https://www.kaggle.com/c/plant-seedlings-classification/data.
• PROJECT OBJECTIVE: To create a classifier capable of determining a plant's species from a photo.
!pip install keras-tuner
from google.colab import drive
drive.mount('/content/drive')
import os
WORK_DIR ='/content/drive/My Drive/Colab/cv/week1-2/Assignment';
os.chdir(WORK_DIR)
from zipfile import ZipFile
import cv2
import pandas as pd
from random import randint
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential,Model, load_model
from tensorflow.keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout,Activation, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam, Adagrad
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau,ModelCheckpoint
from tensorflow.keras import regularizers
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.losses import CategoricalCrossentropy,SparseCategoricalCrossentropy
import tensorflow as tf
from tensorflow import keras
from kerastuner import Hyperband
import kerastuner as kerastuner
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical, array_to_img
from PIL import Image
from PIL import ImageFilter
import random
from sklearn.decomposition import PCA
import tensorflow as tf
seed = 42
random.seed(seed)
tf.random.set_seed(seed)
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
np.random.seed(seed)
A. Extract ‘plant-seedlings-classification.zip’ into new folder (unzipped) using python. [2 Marks]
PLANT_SEEDLING_PATH = './plant-seedlings-classification.zip'
with ZipFile(PLANT_SEEDLING_PATH, 'r') as zipfile:
zipfile.extractall()
root = WORK_DIR + '/plant-seedlings-classification/train'
cols = ['Image Name', 'class', 'Actual Image']
N = 4
plant_seed_df = pd.DataFrame(columns = cols)
for path, subdirs, files in os.walk(root):
for name in files:
if not name.startswith('.'):
full_path = os.path.join(path, name);
#print('Reading path : '+ full_path)
img = cv2.imread(full_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plant_seed_df = plant_seed_df.append({'Image Name': name, 'class':os.path.basename(os.path.dirname(full_path)) ,'Actual Image': img},ignore_index=True)
plant_seed_df.head(5)
C. Write a function that will select n random images and display images along with its species. [4 Marks]
# generate index of each class
def generate_random_index(totalNo, total_no_of_records, label):
random_index = [];
uniqueLabel = np.unique(label)
for index , category in enumerate(uniqueLabel):
if index >=totalNo:
break;
random_index.append(random.choice(plant_seed_df[plant_seed_df['class'] == category].index))
while(len(random_index) != totalNo):
random_index.append(random.randint(0, total_no_of_records));
return random_index;
# Display Image with label
def displayImage(input,y=None):
if (isinstance(input, pd.DataFrame)):
for i in range(len(input)):
plt.imshow(input.iloc[i, 2])
plt.xlabel(input.iloc[i, 1])
plt.show()
else:
for i in range(len(input)):
plt.imshow(input[i])
plt.xlabel("Actual Label: " + str(y[i]))
plt.show();
#Select 5 images
TOTAL_NUMBER_OF_RANDOM_IMAGE = 5
# Take ranom 5 index
random_plant_seed_index= generate_random_index(TOTAL_NUMBER_OF_RANDOM_IMAGE, len(plant_seed_df), plant_seed_df['class'])
#Pick up the records based on the dataset
random_plant_seed_df = plant_seed_df.iloc[random_plant_seed_index]
print('rows selected ')
# display it
display(random_plant_seed_df)
# Print the image
displayImage(random_plant_seed_df)
Observations: Images are of un-equal length.
A. Create X & Y from the DataFrame
X = plant_seed_df[['Actual Image']]
y = plant_seed_df[['class']]
B. Encode labels of the images
label_encoder = LabelEncoder()
y_le = label_encoder.fit_transform(y.values.ravel())
y_le
y_le = to_categorical(y_le, num_classes = 12)
mapping = dict(zip(label_encoder.classes_,label_encoder.transform(label_encoder.classes_)))
mapping
Use dictionary to find out the name and its corresponding label assigned
C. Unify shape of all the images
# Resize the image into 256 x 256
def resize_images(img):
img = np.array(img).astype(np.uint8)
#print(img.dtype)
res = cv2.resize(img,(256,256), interpolation = cv2.INTER_CUBIC)
return res
#save resized images into images.
images = [resize_images(img) for img in X['Actual Image']]
Using randint function select image and check the shape . It should be 256 X 256
rndm_index = randint(1,len(plant_seed_df));
print(rndm_index)
images[rndm_index].shape
# Normalise the image and scale it with 0 min value and 1 max value
def normalize(img):
img = np.expand_dims(img, axis = 0)
img =img*1/255.0
return img;
images = [resize_images(img) for img in images]
Checking the min and max of first image. It should be in 0 to 255 range
print('Min pixel : ' + str(np.min(images[0])) + ' and Max pixel : ' + str(np.max(images[0])))
A. Split the data into train and test data.
X_train, X_test, y_train, y_test = train_test_split(images, y_le, test_size=0.20, stratify=y, random_state=1)
# Print the shape of first image
X_train[0].shape
y_train.shape
#see number of images in each label
X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)
print("images shape: ", X_train.shape)
print("classes shape: ", y_train.shape)
# Reducing overfitting by Using data augmentaion,
train_datagen = ImageDataGenerator(rotation_range=180,
zoom_range = 0.1,
width_shift_range=0.5,
height_shift_range=0.5,
shear_range=0.2,
horizontal_flip=True,
vertical_flip=True,fill_mode='nearest')
train_datagen.fit(X_train)
tf.keras.backend.clear_session()
B. Create new CNN architecture to train the model
tf.keras.backend.clear_session()
stop_early = EarlyStopping(monitor='val_loss', mode='min', patience=7)
reduce_lr = ReduceLROnPlateau(monitor='val_loss',factor=0.1,patience=5,min_lr=0.00001,model='auto')
def build_model(hp):
model=Sequential();
model.add(tf.keras.Input(shape=(256,256,3)))
for layer in range(hp.Int('num_conv_layer', 1,6)):
model.add(BatchNormalization())
model.add(Conv2D(filters = hp.Int('conv '+str(layer+1)+'_filter', min_value=32, max_value=128, step=16),kernel_size=hp.Choice('Conv_Kernel_'+str(layer+1), [3,5,7]), padding=hp.Choice('Conv_padding_'+str(layer+1), ['valid','same']) ,
activation= "relu" ))
model.add(BatchNormalization())
model.add(MaxPooling2D())
model.add(GlobalAveragePooling2D())
for layer in range(hp.Int('num__dense_layer', 1,4)):
model.add(BatchNormalization())
model.add(Dropout(rate=hp.Float('dropout_' + str(layer + 1) ,
min_value=0.0,
max_value=0.5,
step=0.1)))
model.add(Dense(units=hp.Int("Units_"+str(layer+1), min_value=32, max_value=512, step=64), activation= "relu", kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Dense(units=12,activation='softmax'))
model.compile(optimizer='adam',
loss=CategoricalCrossentropy(),
metrics=['accuracy'])
return model
tuner = Hyperband(build_model, objective=kerastuner.Objective("val_accuracy", direction="max"), max_epochs=10, factor=2, hyperparameters=None, directory = 'CNN_project',project_name = 'Q1',overwrite=True)
tuner.search_space_summary()
# tuner.search(train_datagen.flow(X_train,y_train, batch_size=60),validation_data=(X_test,y_test), callbacks=[reduce_lr, stop_early], verbose=2,steps_per_epoch=(X_train.shape[0]/60),use_multiprocessing=True,workers=6)
#tuner.results_summary()
# best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]
# model = tuner.hypermodel.build(best_hps)
# model.save('Hypertuned_CNN_Prob11.h5')
# Above code is used to save the best model into the memory. Since it is taking too much time, so model has been saved and reuse further
model = load_model('Hypertuned_CNN_Prob11.h5')
model.summary()
# Training the hypertuned model with 30 epochs with data agumentatin dataset
model_checkpoint = ModelCheckpoint('Hyptun_plantspecies_CNN_model.h5', save_best_only=True, monitor='val_accuracy', mode='max', verbose=1)
history = model.fit(train_datagen.flow(X_train,y_train, batch_size=60),validation_data=(X_test,y_test), callbacks=[reduce_lr, stop_early, model_checkpoint], epochs=30)
# Plot the loss and accuracy curves for training and validation
fig, ax = plt.subplots(2,1)
ax[0].plot(history.history['loss'], color='b', label="Training loss")
ax[0].plot(history.history['val_loss'], color='r', label="validation loss",axes =ax[0])
legend = ax[0].legend(loc='best', shadow=True)
ax[1].plot(history.history['accuracy'], color='b', label="Training accuracy")
ax[1].plot(history.history['val_accuracy'], color='r',label="Validation accuracy")
legend = ax[1].legend(loc='best', shadow=True)
eval_result = model.evaluate(X_test, y_test)
print("[test loss, test accuracy]:", eval_result)
random_number = randint(0,len(X_test))
img = X_test[random_number]
actual_label = y_test[random_number]
plt.imshow(img)
img = np.expand_dims(img, axis = 0)
#img =img*1/255.0
#Check the size of the Image array again
print('After expand_dims: '+ str(img.shape))
result = model.predict(img)
predicted_index = np.argmax(result[0]);
key_list = list(mapping.keys())
val_list = list(mapping.values())
# print key with val 100
position = val_list.index(predicted_index)
predicted = key_list[position]
actual = key_list[np.argmax(actual_label, axis=None, out=None)]
plt.suptitle("Actual label "+ actual +" ,,,,, Predicted label "+predicted)
plt.show()
Removing the variables to free RAM
tf.keras.backend.clear_session()
del plant_seed_df, random_plant_seed_index,random_plant_seed_df,X,y, y_le,mapping, rndm_index, images, X_train, X_test, y_train, y_test ,train_datagen, tuner,model, random_number, predicted, actual, key_list,val_list
DOMAIN: Botanical Research • CONTEXT: University X is currently undergoing some research involving understanding the characteristics of flowers. They already have have invested on curating sample images. They require an automation which can create a classifier capable of determining a flower’s species from a photo. • DATA DESCRIPTION: The dataset comprises of images from 17 plant species.
A. Import and read oxflower17 dataset from tflearn and split into X and Y while loading.
!pip install tflearn
from tflearn.datasets import oxflower17
X,y = oxflower17.load_data();
print("Total Number of images " , len(X))
Shape of Images
records, height, width, channel = X.shape
print("Shape" , X.shape)
print("Total number of records ", records)
print(f"pixel {height} x {width} x {channel}")
unique = np.unique(y)
print("unique values are ", unique)
print("Total number of class are ", len(unique))
print("Each class images are ")
classRecord = pd.DataFrame(y).value_counts()
classRecord.columns =['Class', 'No of Records']
classRecord
plt.figure(figsize=(10,10))
classRecord.plot(kind='barh')
plt.xlabel('Count')
plt.ylabel('Label')
A. Display 5 random images
# generate index of each class
def generate_random_index(totalNo, total_no_of_records, label):
random_index = [];
label = pd.DataFrame(label)
label.columns = ['label']
uniqueLabel = np.unique(label)
for index , category in enumerate(uniqueLabel):
if index >=totalNo:
break;
random_index.append(random.choice(label[label['label'] == category].index))
while(len(random_index) != totalNo):
random_index.append(random.randint(0, total_no_of_records));
return random_index;
TOTAL_NUMBER_OF_RANDOM_IMAGE = 5
random_flower_index= generate_random_index(TOTAL_NUMBER_OF_RANDOM_IMAGE, len(X), y)
random_flower_X = X[random_flower_index]
random_flower_y = y[random_flower_index]
print('rows selected ')
displayImage(random_flower_X, random_flower_y)
TOTAL_NUMBER_OF_RANDOM_IMAGE =1
random_flower_index= generate_random_index(TOTAL_NUMBER_OF_RANDOM_IMAGE, len(X), y)
random_flower_X = X[random_flower_index]
random_flower_y = y[random_flower_index]
display(random_flower_X[0].shape)
displayImage(random_flower_X, random_flower_y)
rgb_weights = [0.2989, 0.5870, 0.1140]
grayscale_image = np.dot(random_flower_X[...,:3], rgb_weights)
#plt.imshow(tf.squeeze(grayscale_image))
display(grayscale_image.shape)
plt.imshow(np.squeeze(grayscale_image),cmap='gray')
D. Apply a filter to sharpen the image and display the image before and after sharpening.
img_pil = tf.keras.utils.array_to_img(random_flower_X[0])
# Apply sharp filter
sharpened1 = img_pil.filter(ImageFilter.SHARPEN);
fig, (ax1,ax2) = plt.subplots(1,2 ,figsize= (30,10))
ax1.imshow(img_pil)
ax1.set_xlabel("Original Flower Image")
ax2.imshow(sharpened1)
ax2.set_xlabel("Sharpen Flower Image");
# Apply sharp filter
blur1 = img_pil.filter(ImageFilter.BLUR);
fig, (ax1,ax2) = plt.subplots(1,2 ,figsize= (30,10))
ax1.imshow(img_pil)
ax1.set_xlabel("Original Flower Image")
ax2.imshow(blur1)
ax2.set_xlabel("Sharpening Flower Image")
F. Display all the 4 images from above questions besides each other to observe the difference. [1 Marks]
fig, (ax1,ax2,ax3,ax4) = plt.subplots(1,4 ,figsize= (30,10))
ax1.imshow(img_pil)
ax1.set_xlabel("Original Flower Image")
ax2.imshow(np.squeeze(grayscale_image),cmap='gray')
ax2.set_xlabel("Gray Flower Image")
ax3.imshow(sharpened1)
ax3.set_xlabel("Sharpening Flower Image")
ax4.imshow(blur1)
ax4.set_xlabel("Blur Flower Image");
# Printing each category of images.
plt.figure(figsize = (20,20))
col =1
for i in np.unique(y):
plt.subplot(5,4,col)
plt.imshow(X[i])
plt.title(f"Flower Label: {y[i]}",{'fontsize':15})
plt.tick_params(top=False,bottom=False,right=False,labelleft = False,labelbottom=False)
col = col+1
plt.show()
A. Split the data into train and test with 80:20 proportion
X_train, X_test, y_train ,y_test = train_test_split(X,y , test_size=0.20, random_state=seed)
B. Train a model using any Supervised Learning algorithm and share performance metrics on test data
result = {}
# Flatten the layer
def flatten(X):
Xr, Xg, Xb = [],[],[]
for samples in X:
r, g, b = cv2.split(samples)
Xr.append(r.flatten())
Xg.append(g.flatten())
Xb.append(b.flatten())
Xr = np.array(Xr)
Xg = np.array(Xg)
Xb = np.array(Xb)
return (Xr, Xg, Xb)
X_train_r, X_train_g, X_train_b = flatten(X_train)
X_test_r, X_test_g, X_test_b = flatten(X_test)
X_train_r.shape, X_train_g.shape, X_train_b.shape,
X_test_r.shape, X_test_g.shape, X_test_b.shape,
n_components = 200
X_train_r_pca = PCA(n_components=n_components, whiten=True).fit(X_train_r)
X_train_g_pca = PCA(n_components=n_components, whiten=True).fit(X_train_g)
X_train_b_pca = PCA(n_components=n_components, whiten=True).fit(X_train_b)
X_test_r_pca = PCA(n_components=n_components, whiten=True).fit(X_test_r)
X_test_g_pca = PCA(n_components=n_components, whiten=True).fit(X_test_g)
X_test_b_pca = PCA(n_components=n_components, whiten=True).fit(X_test_b)
X_train_r_pca.explained_variance_ratio_.sum(), X_train_g_pca.explained_variance_ratio_.sum(), X_train_b_pca.explained_variance_ratio_.sum()
X_test_r_pca.explained_variance_ratio_.sum(), X_test_g_pca.explained_variance_ratio_.sum(), X_test_b_pca.explained_variance_ratio_.sum()
Xr_train_pca = X_train_r_pca.transform(X_train_r)
Xg_train_pca = X_train_g_pca.transform(X_train_g)
Xb_train_pca = X_train_b_pca.transform(X_train_b)
Xr_test_pca = X_test_r_pca.transform(X_test_r)
Xg_test_pca = X_test_g_pca.transform(X_test_g)
Xb_test_pca = X_test_b_pca.transform(X_test_b)
X_train_pca = np.concatenate([Xr_train_pca,Xg_train_pca,Xb_train_pca], axis=1)
X_test_pca = np.concatenate([Xr_test_pca,Xg_test_pca,Xb_test_pca], axis=1)
X_train_pca.shape, y_train.shape,
X_test_pca.shape, y_test.shape,
param_grid=[
{'n_estimators':[100,200],
'min_samples_leaf':[2,3]}
]
rf = RandomForestClassifier()
clf = GridSearchCV(rf, param_grid, verbose=10, n_jobs=-1)
clf.fit(X_train_pca,y_train)
rf_score = clf.score(X_test_pca, y_test)
y_pred = clf.predict(X_test_pca)
rf_accuracy = metrics.accuracy_score(y_test, y_pred)
print(f"The predicted label is:'{rf_accuracy}'")
result['Random Forest'] = ['NA', rf_accuracy, clf]
del X_train_pca,X_test_pca
C. Train a model using Neural Network and share performance metrics on test data
print('Shape of X_train ' ,X_train.shape)
print('Shape of y_train ' ,y_train.shape)
print('Shape of X_test ' ,X_test.shape)
print('Shape of y_test ' ,y_test.shape)
Convert the label into desired CNN format
num_of_classes=len(np.unique(y_train))
print(num_of_classes)
y_train_cal = to_categorical(y_train,num_classes=num_of_classes)
y_test_cal = to_categorical(y_test, num_classes=num_of_classes)
train_datagen = ImageDataGenerator(rotation_range=180,
zoom_range=0.3,
width_shift_range=0.3,
height_shift_range=0.3,
shear_range=0.3,
horizontal_flip=True,
vertical_flip=True,
fill_mode='nearest',
)
test_datagen = ImageDataGenerator()
train_datagen.fit(X_train)
#test_datagen.fit(X_test)
batch_size = 64
train_datagen = train_datagen.flow(X_train, y_train_cal, batch_size=batch_size)
validation_set = test_datagen.flow(X_test, y_test_cal, batch_size=batch_size)
from keras.backend import reshape
from tensorflow import keras
tf.keras.backend.clear_session()
def build_model(hp):
model = Sequential()
model.add(keras.layers.InputLayer(input_shape=(224, 224, 3)))
model.add(Flatten())
for layer in range(hp.Int('num_layer', 1,4)):
model.add(BatchNormalization())
model.add(Dense(units=hp.Int("Units_"+str(layer+1), min_value=32, max_value=1024, step=32), activation= hp.Choice('activation_' +str(layer+1), ["relu"]), kernel_initializer='he_uniform' ))
model.add(Dropout(rate=hp.Float('dropout_' + str(layer + 1) ,
min_value=0.0,
max_value=0.5,
step=0.1)))
model.add(BatchNormalization())
model.add(Dense(units=num_of_classes, activation='softmax'))
learning_rate = hp.Float("lr", min_value=1e-4, max_value=1e-1, sampling="log")
model.compile(optimizer=Adam(learning_rate=learning_rate),loss=CategoricalCrossentropy(),metrics=['accuracy'])
return model
tuner = Hyperband(build_model, objective=kerastuner.Objective("val_acc", direction="max"), max_epochs=15, factor=2, hyperparameters=None, directory = 'Flower_NN_Q2',project_name = 'Tuning_MODEL_NN',overwrite=True)
tuner.search_space_summary()
stop_early = EarlyStopping(monitor='val_loss', mode='min', patience=7)
reduce_lr = ReduceLROnPlateau(monitor='val_loss',factor=0.1,patience=5,min_lr=0.00001,model='auto', verbose=1)
tuner.search(train_datagen,validation_data=(X_test,y_test_cal), steps_per_epoch=X_train.shape[0]/ batch_size)
best_ANN_model = tuner.get_best_models()[0]
best_ANN_model.build(X_train.shape)
# Saving model into the memory
best_ANN_model.save('Hypertuned_ANN_Flower_Prob2.h5')
best_ANN_model.summary()
It took 60 min to hypertuned the model. Model has been saved so that it can be reuse again with again doing hypertuning
# Loading model from the memory
best_ANN_model = load_model('Hypertuned_ANN_Flower_Prob2.h5')
best_ANN_model.summary()
# Train the model with 50 epochs
checkpoint_nn = ModelCheckpoint("tflearn_ANN_Flower.h5",monitor='val_acc', mode='max',verbose=1, save_best_only=True)
history_ANN_hypertuned = best_ANN_model.fit(train_datagen, validation_data=(X_test,y_test_cal), callbacks=[reduce_lr,stop_early, checkpoint_nn], epochs=50, verbose=2, steps_per_epoch=X_train.shape[0]/ 64)
y_pre = best_ANN_model.predict(X_test)
y_pred_final=[]
for i in y_pre:
y_pred_final.append(np.argmax(i))
loss, accuracy = best_ANN_model.evaluate(X_test, y_test_cal)
result['ANN'] = [loss, accuracy, best_ANN_model]
print('loss in Testing data ', loss)
print('Accuracy in Testing data ', accuracy)
Accuracy is 44% . Lets try CNN
# Printing the confusion metrics
cm = confusion_matrix(y_pred_final,y_test)
plt.figure(figsize=(10,7))
sns.heatmap(cm,annot=True,fmt='d')
plt.xlabel('Truth')
plt.ylabel('Predicted')
print(classification_report(y_test, y_pred_final))
# F1 score is less for each class. Overall accuracy is 44%
# displaying the Accuracy and Loss of model
def show_final_history(history):
fig, ax = plt.subplots(1,2,figsize=(15,5))
ax[0].set_title("Loss")
ax[1].set_title("Accuracy")
ax[0].plot(history.history["loss"],label="Loss")
ax[0].plot(history.history["val_loss"],label="Test Loss")
ax[1].plot(history.history["acc"],label="Accuracy")
ax[1].plot(history.history["val_acc"],label="Test Accuracy")
ax[0].legend(loc="upper right")
ax[1].legend(loc="lower right")
show_final_history(history_ANN_hypertuned)
D. Train a model using a basic CNN and share performance metrics on test data
tf.keras.backend.clear_session()
# Hypertuning CNN model
def build_model(hp):
model=Sequential();
model.add(tf.keras.Input(shape=(224,224,3)))
for layer in range(hp.Int('num_conv_layer', 1,5)):
model.add(BatchNormalization())
model.add(Conv2D(filters = hp.Int('conv '+str(layer+1)+'_filter', min_value=32, max_value=128, step=16),kernel_size=hp.Choice('Conv_Kernel_'+str(layer+1), [3,5,7]), padding=hp.Choice('Conv_padding_'+str(layer+1), ['valid','same']) ,
activation= "relu" ))
model.add(BatchNormalization())
model.add(MaxPooling2D())
model.add(GlobalAveragePooling2D())
for layer in range(hp.Int('num__dense_layer', 1,4)):
model.add(BatchNormalization())
model.add(Dropout(rate=hp.Float('dropout_' + str(layer + 1) ,
min_value=0.0,
max_value=0.5,
step=0.1)))
model.add(Dense(units=hp.Int("Units_"+str(layer+1), min_value=32, max_value=512, step=64), activation= "relu", kernel_initializer='he_uniform'))
model.add(BatchNormalization())
model.add(Dense(units=num_of_classes,activation='softmax'))
learning_rate = hp.Float("lr", min_value=1e-4, max_value=1e-1, sampling="log")
model.compile(optimizer=Adam(learning_rate=learning_rate),
loss=CategoricalCrossentropy(),
metrics=['accuracy'])
return model
tuner = Hyperband(build_model, objective=kerastuner.Objective("val_acc", direction="max"), max_epochs=15, factor=2, hyperparameters=None, directory = 'Flower_NN_Q2',project_name = 'TUNING_MODEL_CNN',overwrite=True)
tuner.search_space_summary()
print("images shape: ", X_train.shape)
print("classes shape: ", y_train.shape)
# tuner.search(train_datagen,
# steps_per_epoch=(X_train.shape[0]/batch_size),
# validation_data=(X_test, y_test_cal)
# )
# best_CNN_model = tuner.get_best_models()[0]
# best_CNN_model.build(X_train.shape)
# best_CNN_model.summary()
# best_CNN_model.save('Hypertuned_Flower_CNN_Prob2.h5')
# best_CNN_model.summary()
best_CNN_model = load_model('Hypertuned_Flower_CNN_Prob2.h5')
best_CNN_model.summary()
#Training the model with 50 epochs
checkpoint_cnn = ModelCheckpoint("tflearn_CNN_.h5",monitor='val_acc',
mode='max',verbose=1, save_best_only=True)
history_hypertuned = best_CNN_model.fit(train_datagen, validation_data=(X_test, y_test_cal), callbacks=[reduce_lr, stop_early,checkpoint_cnn], epochs=50, verbose=2)
#history_hypertuned = best_CNN_model.fit(X_train, y_train_cal, validation_data=(X_test, y_test_cal), callbacks=[reduce_lr, stop_early,checkpoint_cnn], epochs=100, verbose=2, batch_size=64)
y_pre = best_CNN_model.predict(X_test)
y_pred_final=[]
for i in y_pre:
y_pred_final.append(np.argmax(i))
loss, accuracy = best_CNN_model.evaluate(X_test, y_test_cal)
result['CNN'] = [loss, accuracy, best_CNN_model]
print('loss in Testing data ', loss)
print('Accuracy in Testing data ', accuracy)
cm = confusion_matrix(y_pred_final,y_test)
plt.figure(figsize=(10,7))
sns.heatmap(cm,annot=True,fmt='d')
plt.xlabel('Truth')
plt.ylabel('Predicted')
print(classification_report(y_test, y_pred_final))
show_final_history(history_hypertuned)
Loss for training is less than testing.
#Printing the result of all model and take the better one
result1 = pd.DataFrame(np.array(list(result.values()))[:,:-1], # make a dataframe out of the metrics from result dictionary
columns= ['Loss','accuracy'],
index= result.keys()) # use the model names as index
result1.index.name = 'Model' # name the index of the result1 dataframe as 'Model'
result1
Observation:
E. Predict the class/label of image ‘Prediction.jpg’ using best performing model and share predicted label.
model = result['CNN'][2]
img = WORK_DIR + '/Prediction.jpg'
predicted_img = cv2.imread(img)
predicted_img = cv2.cvtColor(predicted_img, cv2.COLOR_BGR2RGB)
predicted_img = predicted_img/255
predicted_img = np.expand_dims(predicted_img, axis = 0)
#img =img*1/255.0
#Check the size of the Image array again
print('After expand_dims: '+ str(predicted_img.shape))
result = model.predict(predicted_img)
predicted_index = np.argmax(result[0]);
plt.imshow(np.squeeze(predicted_img))
plt.suptitle('Predicted label '+str(predicted_index))
plt.show();
Obseravation : The image are predicted with LABEL 2